use "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta", clear

gen NONEMP = 1 if ABS==1
	recode NONEMP (.=0) if ABS==0
	
gen treated = 1 if HS==1 & (NONEMP==1)
	recode treated (.=0)
gen control = 1 if HS==0 & (NONEMP==1 | NONEMP==0)
	recode control (.=0)	
	
gen month_shock = t-12*int(t/12)
	recode month_shock (0=12)
gen month_return = month_shock+d
gen diff_year = 1 if month_return >= 13

keep if treated == 1 | control == 1
	
keep if d>=3
keep if NONEMP !=.

keep anon tmerge_t t d maxt_clue mint_clu flagEMPml1 flagEMPm0 diff_year ABS HS treated control ferfi kor
        
*tmerge_t        month of the shock 
*t		month of merging firm-level data in m=0 or m=-1 (=tmerge_t if flagEMPm0==1; =tmerge_t-1 if flagEMPml1==1)
*maxt_clue       last month of the clustered accident (t in which m=0 for the last time)
*mint_clu        first month of the clustered accident (t in which m=0 for the first time)
*flagEMPml1      indicator for treated being EMP in one month before the accident
*flagEMPm0       indicator for treated being EMP in the month of the accident

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 fogvisz1 feor1_h2 teaor1_h1)
merge 1:1 anon t using "$in/admin3_feor.dta", keep(master match) nogen keepusing(feor1_2003_4 feor1_2008_4)

* add controls and outcome variables
* outcomes: lnw1_restr FEj - in relative_time 0,1,2,3
* additional outcomes: FO mean_tfp_wr - in relative time 0,1,2,3
* controls: kor occupation teaor1_h1 ferfi mean_lag_w log_health_12mo A_12mo-V_12mo FEi_imp flag_missingFEi size_pre_imp FO_pre_imp logmean_w_pre_imp flag_missingsize_pre flag_missingFO_pre flag_missinglogmean_w_pre	flag_missingFEj_l4 - FEj_l12_imp 	
* one row is an anon-t-d triplet

merge m:1 anon using "$out_AKM/FEi_full_Jan2023.dta"
tab _m
drop if _m==2
drop _m
rename anon_fix FEi

* step 1: add the t-related

* assign occupation in t
rename feor1_h2 feor1_h2_t
gen occupation=.
	replace occupation=0 if missing(feor1_h2_t)
	replace occupation=3 if feor1_h2_t==11
	replace occupation=4 if feor1_h2_t==12
	replace occupation=7 if feor1_h2_t==13
	replace occupation=1 if feor1_h2_t==14
	replace occupation=1 if feor1_h2_t==15
	replace occupation=3 if feor1_h2_t>=17 & feor1_h2_t<=23
	replace occupation=4 if feor1_h2_t>=24 & feor1_h2_t<=33
	replace occupation=5 if feor1_h2_t>=34 & feor1_h2_t<=43
	replace occupation=6 if feor1_h2_t>=44 & feor1_h2_t<=47
	replace occupation=7 if feor1_h2_t==48	 
rename vallazon1 vallazon
gen ev=2003+int((t-1)/12)
merge m:1 vallazon ev using "/$out/firmquality.dta", nogen keep(master match) keepusing(letszam FO mean_firmw mean_tfp_wr)
foreach var of varlist mean_firmw FO letszam mean_tfp_wr feor1_2003_4 feor1_2008_4{
	rename `var' `var'_t
}
rename *mean_firmw* *mean_w*
rename *letszam* *size*
foreach x in size FO mean_w mean_tfp_wr{
	rename 	`x'_t `x'_pre
}
gen FO_t=FO_pre
gen mean_tfp_wr_t=mean_tfp_wr_pre
gen logmean_w_pre=log(mean_w_pre)
drop mean_w_pre

* add l1-l3 values of restricted wage, firm FE, hours worked and additional wage variables

gen n=_n
expand 4
rename t t_pre
egen t=seq(), by(n)
replace t=t-4
replace t=t +tmerge_t

foreach X in vallazon w1 wh1 fogvisz1{
	rename `X' `X'_t
}

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge 1:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)
tab EMP if t==tmerge_t-1, m
tab treated if EMP==. & t==tmerge_t-1
xtset anon t
tab treated if EMP==. & l.EMP==. & l2.EMP==. & t==tmerge_t-1

* create a winsorized version of the firm FE variable, only for the EMP=1
preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

* create a winsorized version of w1_t
gen w1_wins_t=w1_t
egen wp1_t=pctile(w1_t) if EMP==1, by(t) p(1)
egen wp99_t=pctile(w1_t) if EMP==1, by(t) p(99)
replace w1_wins_t=wp1_t if w1_wins_t<wp1_t & w1_wins_t!=0 & EMP==1
replace w1_wins_t=wp99_t if w1_wins_t>wp99_t & w1_wins_t!=. & EMP==1
drop wp1_t wp99_t

replace ev=2003+int((t-1)/12)

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* deflate the wage variable	
gen w1_defl=w1/defl
gen w1_wins_defl=w1_wins/defl

* deflated wage and hours worked changed to missing if EMP!=1 that period
replace w1_defl=. if EMP!=1
replace w1_wins_defl=. if EMP!=1
replace wh1=. if EMP!=1 

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	

* creating month of the year
gen ho = t-int(t/12)*12
	recode ho(0=12)

* alternative wage variables 

foreach y in "" _wins{	
	foreach x in "" j{
		gen w`y'_`x'nap = w1`y'_defl/(`x'nap/31) if ho==1 | ho==3 | ho==5 | ho==7 | ho==8 | ho==10 | ho==12
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/30) if ho==4 | ho==6 | ho==9 | ho==11
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/29) if ho==2 & (ev==2012 | ev==2008 | ev==2004)
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/28) if ho==2 & ev!=2012 & ev!=2008 &  ev!=2004
			replace w`y'_`x'nap = w1`y'_defl if `x'nap==.
	}
	gen lnw1`y'_restr=ln((w1`y'_defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_adj=ln((w`y'_nap)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_nap=ln(w`y'_nap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_jnap=ln(w`y'_jnap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_totw=ln(w1`y'_defl)
		
	foreach X in lnw1`y'_restr lnw1`y'_adj lnw1`y'_nap lnw1`y'_jnap lnw1`y'_totw FEj`y'{
		egen `X'_t_=mean(`X') if t<tmerge_t, by(n)
		egen `X'_t0=min(`X'_t_), by(n)
		drop `X'_t_
	}
}

foreach X in wh1{
	egen `X'_t_=mean(`X') if t<tmerge_t, by(n)
	egen `X'_t0=min(`X'_t_), by(n)
	drop `X'_t_
}

keep if t==tmerge_t
drop t nap jnap vallazon w1 w1_wins wh1 fogvisz1 transfer_ny transfer_mtp FEj FEj_wins SLdays defl w1_defl w1_wins_defl EMP ho w_nap w_jnap lnw1_restr lnw1_adj lnw1_nap lnw1_jnap lnw1_totw w_wins_nap w_wins_jnap lnw1_wins_restr lnw1_wins_adj lnw1_wins_nap lnw1_wins_jnap lnw1_wins_totw
sum lnw1_restr_t0 lnw1_adj_t0 lnw1_nap_t0 lnw1_jnap_t0 lnw1_totw_t0 FEj_t0 wh1_t0 lnw1_wins_restr_t0 lnw1_wins_adj_t0 lnw1_wins_nap_t0 lnw1_wins_jnap_t0 lnw1_wins_totw_t0 FEj_wins_t0 

* add l4-l12 values of lnw1_restr and FEj 

expand 10
egen t=seq(), by(n)
replace t=t-14 if t!=1
replace t=0 if t==1
replace t=t +tmerge_t

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge 1:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

replace ev=2003+int((t-1)/12)

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
foreach X in "" _wins{
	gen lnw1`X'_restr=log((w1`X'/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	replace lnw1`X'_restr=. if EMP!=1

}
drop EMP

foreach X in "" _wins{	
	egen mean_lag_w`X'_=mean(lnw1`X'_restr) if t<tmerge_t, by(n)
	egen mean_lag_w`X'=min(mean_lag_w`X'_), by(n)
	drop mean_lag_w`X'_
}
drop w1 w1_wins wh1 transfer_ny transfer_mtp lnw1_restr lnw1_wins_restr vallazon nap jnap fogvisz1 SLdays

foreach var of varlist FEj FEj_wins{
	gen flag_missing`var' = 1 if `var' == .
		recode flag_missing`var' (.=0)
	gen `var'_imp = `var'
	sum `var'
	replace `var'_imp = `r(mean)' if flag_missing`var' == 1
	drop `var'
}
gen time=tmerge_t-t
rename flag_missingFEj flag_missingFEj_l
rename flag_missingFEj_wins flag_missingFEj_wins_l
rename FEj_imp FEj_imp_l
rename FEj_wins_imp FEj_wins_imp_l
drop ev defl t 
reshape wide flag_missingFEj_l FEj_imp_l FEj_wins_imp_l flag_missingFEj_wins_l, i(n) j(time)
rename FEj_imp_l* FEj_l*_imp    
rename FEj_wins_imp_l* FEj_wins_l*_imp    

foreach var of varlist size_pre FO_pre logmean_w_pre mean_tfp_wr_pre FEi {
	gen flag_missing`var' = 1 if `var' == .
		recode flag_missing`var' (.=0)
	gen `var'_imp = `var'
	sum `var'
	replace `var'_imp = `r(mean)' if flag_missing`var' == 1
	drop `var'
}

* add l3-l12 values of health-related variables
	
expand 11
egen t=seq(), by(n)
replace t=t-14 if t!=1
replace t=0 if t==1
replace t=t +tmerge_t

merge m:1 anon t using "$in/admin3_eu_fekvo.dta", nogen keepusing(fekvo_ft) keep(master match)
merge m:1 anon t using "$in/admin3_eu_jaro.dta", nogen keepusing(jaro_ft_ossz) keep(master match)
merge m:1 anon t using "$in/admin3_eu_veny.dta", nogen keepusing(tbtam_ossz betegft_ossz) keep(master match)
merge m:1 anon t using "$in_med/admin3_eu_veny_H2_v2.dta", nogen keepusing(A_ft B_ft C_ft D_ft G_ft H_ft J_ft L_ft M_ft N_ft P_ft R_ft S_ft V_ft) keep(master match)

foreach var of varlist fekvo_ft jaro_ft_ossz tbtam_ossz betegft_ossz {
	replace `var'=0 if `var'==.
}
gen health = fekvo_ft + jaro_ft_ossz + tbtam_ossz + betegft_ossz
egen health_12mo_=sum(health) if t<tmerge_t, by(n)
egen health_12mo=min(health_12mo_), by(n)
gen log_health_12mo=log(health_12mo)
replace log_health_12mo=0 if log_health_12mo==.
drop health_12mo health_12mo_ fekvo_ft jaro_ft_ossz tbtam_ossz betegft_ossz
foreach var in A B C D G H J L M N P R S V {
	replace `var'_ft=0 if `var'_ft==.
	replace `var'_ft=1 if `var'_ft>0 & `var'_ft~=.
	egen `var'_12mo_=max(`var'_ft) if t<tmerge_t, by(n)
	egen `var'_12mo=min(`var'_12mo_), by(n)
	drop `var'_12mo_ `var'_ft
}
			
keep if t==tmerge_t

drop t n flag_missingFEj_l0 FEj_l0_imp health

* step 2: add the t+d-related

* one row is an anon-t-d triplet 

gen n=_n
expand 14
egen t=seq(), by(n)
replace t=t*2+2 if t!=1
replace t=0 if t==1
replace t=tmerge_t+d+t if maxt_clue==.
replace t=maxt_clue+d+t if maxt_clue!=.

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 feor1_h2)
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
merge m:1 anon t using "$in/admin3_feor.dta", keep(master match) nogen keepusing(feor1_2003_4 feor1_2008_4)
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

gen ev=2003+int((t-1)/12)
gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* deflate the wage variable	
gen w1_defl=w1/defl
gen w1_wins_defl=w1_wins/defl

* deflated wage and hours worked changed to missing if EMP!=1 that period
tab treated if EMP==. & ((t==tmerge_t+d & maxt_clue==.) | (t==maxt_clue+d & maxt_clue!=.))
replace w1_defl=. if EMP!=1
replace w1_wins_defl=. if EMP!=1
replace wh1=. if EMP!=1 

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20

* creating month of the year
gen ho = t-int(t/12)*12
	recode ho(0=12)

* alternative wage variables 

foreach Y in "" _wins{
	foreach x in "" j{
		gen w`Y'_`x'nap = w1`Y'_defl/(`x'nap/31) if ho==1 | ho==3 | ho==5 | ho==7 | ho==8 | ho==10 | ho==12
			replace w`Y'_`x'nap = w1`Y'_defl/(`x'nap/30) if ho==4 | ho==6 | ho==9 | ho==11
			replace w`Y'_`x'nap = w1`Y'_defl/(`x'nap/29) if ho==2 & (ev==2012 | ev==2008 | ev==2004)
			replace w`Y'_`x'nap = w1`Y'_defl/(`x'nap/28) if ho==2 & ev!=2012 & ev!=2008 &  ev!=2004
			replace w`Y'_`x'nap = w1`Y'_defl if `x'nap==.
	}

	gen lnw1`Y'_restr=ln((w1`Y'_defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`Y'_adj=ln((w`Y'_nap)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`Y'_nap=ln(w`Y'_nap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`Y'_jnap=ln(w`Y'_jnap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`Y'_totw=ln(w1`Y'_defl)
}

gen rel_time=t-d-tmerge_t if maxt_clue==.
replace rel_time=t-d-maxt_clue if maxt_clue!=.
tab rel_time, m

foreach X in lnw1_restr lnw1_adj lnw1_nap lnw1_jnap lnw1_totw FEj wh1 lnw1_wins_restr lnw1_wins_adj lnw1_wins_nap lnw1_wins_jnap lnw1_wins_totw FEj_wins{
	egen `X'_td1y_=mean(`X') if rel_time>=6 & rel_time<=18, by(n)
	egen `X'_td1y=min(`X'_td1y_), by(n)
	drop `X'_td1y_
	egen `X'_td2y_=mean(`X') if rel_time>=18 & rel_time<=30, by(n)
	egen `X'_td2y=min(`X'_td2y_), by(n)
	drop `X'_td2y_
}

* 2-digit and 4-digit occupation and firm id at the time of returning 

xtset anon t
foreach X in 3 8{
	gen feor1_200`X'_t1_=feor1_200`X'_4 if rel_time==0
	egen feor1_200`X'_t1=max(feor1_200`X'_t1_), by(n)
	drop feor1_200`X'_t1_
	gen feor1_200`X'_t2_=feor1_200`X'_4 if rel_time==12
	replace feor1_200`X'_t2_=l2.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=f2.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=l4.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=f4.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=l6.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=f6.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	egen feor1_200`X'_t2=max(feor1_200`X'_t2_), by(n)
	drop feor1_200`X'_t2_
	gen feor1_200`X'_t3_=feor1_200`X'_4 if rel_time==24
	replace feor1_200`X'_t3_=l2.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=f2.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=l4.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=f4.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=l6.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=f6.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	egen feor1_200`X'_t3=max(feor1_200`X'_t3_), by(n)
	drop feor1_200`X'_t3_
} 
foreach X in feor1_h2 vallazon {
	gen `X'_t1_=`X' if rel_time==0
	egen `X'_t1=max(`X'_t1_), by(n)
	drop `X'_t1_
	gen `X'_t2_=`X' if rel_time==12
	replace `X'_t2_=l2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l6.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f6.`X' if `X'_t2_==. & rel_time==12
	egen `X'_t2=max(`X'_t2_), by(n)
	drop `X'_t2_
	gen `X'_t3_=`X' if rel_time==24
	replace `X'_t3_=l2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l6.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f6.`X' if `X'_t3_==. & rel_time==24
	egen `X'_t3=max(`X'_t3_), by(n)
	drop `X'_t3_
} 


* mean tfp, FO upon return to work

merge m:1 vallazon ev using "$out/firmquality.dta", nogen keep(master match) keepusing(FO mean_tfp_wr)
xtset anon t
foreach X in FO mean_tfp_wr{
	gen `X'_t1_=`X' if rel_time==0
	egen `X'_t1=max(`X'_t1_), by(n)
	drop `X'_t1_
	gen `X'_t2_=`X' if rel_time==12
	replace `X'_t2_=l2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l6.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f6.`X' if `X'_t2_==. & rel_time==12
	egen `X'_t2=max(`X'_t2_), by(n)
	drop `X'_t2_
	gen `X'_t3_=`X' if rel_time==24
	replace `X'_t3_=l2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l6.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f6.`X' if `X'_t3_==. & rel_time==24
	egen `X'_t3=max(`X'_t3_), by(n)
	drop `X'_t3_
} 

keep if rel_time==0

foreach X in lnw1_restr lnw1_adj lnw1_nap lnw1_jnap lnw1_totw FEj wh1 lnw1_wins_restr lnw1_wins_adj lnw1_wins_nap lnw1_wins_jnap lnw1_wins_totw FEj_wins{
	rename `X' `X'_t1
	rename `X'_td1y `X'_t2
	rename `X'_td2y `X'_t3
}
	
drop n t nap jnap vallazon ev w1 w1_wins fogvisz1 transfer_ny transfer_mtp SLdays feor1_h2 feor1_2003_4 feor1_2008_4 defl w1_defl w1_wins_defl EMP rel_time  ho w_nap w_jnap w_wins_nap w_wins_jnap FO mean_tfp_wr

* add firm fixed effect and main wage variable in the months after the accident - m1 m2 m3 ... m12

gen n=_n
expand 12
egen t=seq(), by(n)
replace t=tmerge_t+t if maxt_clue==.
replace t=maxt_clue+t if maxt_clue!=.

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge 1:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge


* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

gen ev=2003+int((t-1)/12)
gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
gen lnw1_restr=log((w1/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_restr=. if EMP!=1
gen lnw1_wins_restr=log((w1_wins/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_wins_restr=. if EMP!=1
drop EMP
drop w1 w1_wins wh1 transfer_ny transfer_mtp vallazon nap jnap fogvisz1 SLdays ev defl

gen time=t-tmerge_t if maxt_clue==.
replace time=t-maxt_clue if maxt_clue!=.

drop t 
reshape wide FEj FEj_wins lnw1_restr lnw1_wins_restr, i(n) j(time)
foreach X of numlist 1/12{
	rename FEj`X' FEj_m`X'
	rename lnw1_restr`X' lnw1_restr_m`X'
	rename FEj_wins`X' FEj_wins_m`X'
	rename lnw1_wins_restr`X' lnw1_wins_restr_m`X'
}

rename t_pre t

gen ev=2003+int((tmerge_t-1)/12)

drop n

rename feor1_*_4_t feor1_*_t

order anon tmerge_t ev t d maxt_clue mint_clu flagEMPml1 flagEMPm0 diff_year ABS HS treated control ferfi kor vallazon_t vallazon_t1 vallazon_t2 vallazon_t3 w1_t w1_wins_t wh1_t fogvisz1_t occupation teaor1_h1 logmean_w_pre_imp FO_pre_imp size_pre_imp flag_missingsize_pre flag_missingFO_pre flag_missinglogmean_w_pre FEi_imp flag_missingFEi mean_lag_w FEj_l*_imp FEj_wins_l*_imp flag_missingFEj_l* A_12mo B_12mo C_12mo D_12mo G_12mo H_12mo J_12mo L_12mo M_12mo N_12mo P_12mo R_12mo S_12mo V_12mo log_health_12mo lnw1_restr_t0 lnw1_restr_t1 lnw1_restr_t2 lnw1_restr_t3 lnw1_wins_restr_t0 lnw1_wins_restr_t1 lnw1_wins_restr_t2 lnw1_wins_restr_t3 FEj_t0 FEj_t1 FEj_t2 FEj_t3 FEj_wins_t0 FEj_wins_t1 FEj_wins_t2 FEj_wins_t3 mean_tfp_wr_pre_imp flag_missingmean_tfp_wr_pre feor1_h2_t feor1_h2_t1 feor1_h2_t2 feor1_h2_t3 feor1_2003_t feor1_2008_t feor1_2003_t1 feor1_2008_t1 feor1_2003_t2 feor1_2008_t2 feor1_2003_t3 feor1_2008_t3 FO_t FO_t1 FO_t2 FO_t3 mean_tfp_wr_t mean_tfp_wr_t1 mean_tfp_wr_t2 mean_tfp_wr_t3 wh1_t0 wh1_t1 wh1_t2 wh1_t3 lnw1_totw_t0 lnw1_totw_t1 lnw1_totw_t2 lnw1_totw_t3 lnw1_adj_t0 lnw1_adj_t1 lnw1_adj_t2 lnw1_adj_t3 lnw1_nap_t0 lnw1_nap_t1 lnw1_nap_t2 lnw1_nap_t3 lnw1_jnap_t0 lnw1_jnap_t1 lnw1_jnap_t2 lnw1_jnap_t3 lnw1_wins_totw_t0 lnw1_wins_totw_t1 lnw1_wins_totw_t2 lnw1_wins_totw_t3 lnw1_wins_adj_t0 lnw1_wins_adj_t1 lnw1_wins_adj_t2 lnw1_wins_adj_t3 lnw1_wins_nap_t0 lnw1_wins_nap_t1 lnw1_wins_nap_t2 lnw1_wins_nap_t3 lnw1_wins_jnap_t0 lnw1_wins_jnap_t1 lnw1_wins_jnap_t2 lnw1_wins_jnap_t3 FEj_m* lnw1_restr_m* FEj_wins_m* lnw1_wins_restr_m*	
	
sum
save "$out/data_for_main_regs_20231122.dta"


/*
VARIABLE DEFINITIONS:
anon            person
tmerge_t        month of the shock 
ev		year of the accident
t		month of merging firm-level data in m=0 or m=-1 (=tmerge_t if flagEMPm0==1; =tmerge_t-1 if flagEMPml1==1)
d		person is EMP for d months after the (end of the clustered) accident for the first time
maxt_clue       last month of the clustered accident (t in which m=0 for the last time)
mint_clu        first month of the clustered accident (t in which m=0 for the first time)
flagEMPml1      indicator for treated being EMP in one month before the accident
flagEMPm0       indicator for treated being EMP in the month of the accident
diff_year	indicator for returning in the year after the shock
ABS             person has an absence spell of at least 2 months after t (can be one for the controls as well)
HS              has health shock 
treated		indicator for person having an accident + absence of 3-11 months
control		indicator for person having no accident (might or might not have absence)
ferfi           male
kor             age in t
vallazon_t 	firm id in t (or t-1 if treated only EMP=1 in t-1)
w1_t 		wage in t (or t-1 if treated only EMP=1 in t-1)
wh1_t 		hours worked in t (or t-1 if treated only EMP=1 in t-1)
fogvisz1_t 	type of contract in t (or t-1 if treated only EMP=1 in t-1)
feor1_h2_t 	2-digit harmonized occupation code in t (or t-1 if treated only EMP=1 in t-1)
occupation	occupation category in t (or t-1 if treated only EMP=1 in t-1)
teaor1_h1 	industry in t (or t-1 if treated only EMP=1 in t-1)
feor1_2003_4_t 	pre-2008 4-digit occupation code in t (or t-1 if treated only EMP=1 in t-1)
feor1_2008_4_t	post-2008 4-digit occupation code in t (or t-1 if treated only EMP=1 in t-1)
logmean_w_pre_imp	average wage of firm in which being employed in t (or t-1 if treated only EMP=1 in t-1), imputed with population mean if missing
FO_pre_imp		firm foreign in which being employed in t (or t-1 if treated only EMP=1 in t-1), imputed with population mean if missing
size_pre_imp	size of firm in which being employed in t (or t-1 if treated only EMP=1 in t-1), imputed with population mean if missing
flag_missingsize_pre 	missing size of firm in which being employed in t (or t-1 if treated only EMP=1 in t-1)
flag_missingFO_pre 	missing foreign firm in which being employed in t (or t-1 if treated only EMP=1 in t-1)	
flag_missinglogmean_w_pre	missing average wage of firm in which being employed in t (or t-1 if treated only EMP=1 in t-1)
FEi_imp			person FE from AKM, imputed with population mean if missing
flag_missingFEi	indicator for missing firm FE from AKM
lnw1_restr_t0	pre-acc (l1-l3 avg) monthly wage deflated and corrected for hours worked (=log((w1_t/defl)/((wh1_t*30)/7)))
FEj_t0		pre-acc (l1-l3 avg) firm FE 
mean_lag_w	monthly wage deflated and corrected for hours worked, average of 4-12 months pre-acc (=log((w1_t/defl)/((wh1_t*30)/7)))
FEj_l*_imp	lagged firm FE, , imputed with population mean if missing
flag_missingFEj_l*	indicator for originally missing lagged firm FE 
A_12mo B_12mo C_12mo D_12mo G_12mo H_12mo J_12mo L_12mo M_12mo N_12mo P_12mo R_12mo S_12mo V_12mo	if drug cat 3-12 months pre-acc
log_health_12mo	log of total health spending 3-12 months before the accident
lnw1_restr_t1	monthly wage deflated and corrected for hours worked in t+d (=log((w1_t/defl)/((wh1_t*30)/7)))
FEj_t1		firm FE in t+d
lnw1_restr_t2	monthly wage deflated and corrected for hours worked in t+d+12 (avg +6...+18) (=log((w1_t/defl)/((wh1_t*30)/7)))
lnw1_restr_t3	monthly wage deflated and corrected for hours worked in t+d+24 (avg +18...+30) (=log((w1_t/defl)/((wh1_t*30)/7)))
FEj_t2		firm FE in t+d+12 (avg +6...+18)
FEj_t3		firm FE in t+d+12 (avg +18...+30)
feor1_2003_t1	pre-2008 4-digit occupation code in t+d 
feor1_2008_t1	post-2008 4-digit occupation code in t+d 

* ADDITIONAL VARIABLES IN THIS VERSION

mean_tfp_wr_pre_imp	firm TFP in which being employed in t (or t-1 if treated only EMP=1 in t-1), imputed with population mean if missing
flag_missingmean_tfp_wr_pre	missing firm TFP in which being employed in t (or t-1 if treated only EMP=1 in t-1)
feor1_2003_t2	pre-2008 4-digit occupation code in t+d+12 (avg +6...+18) 
feor1_2008_t2	post-2008 4-digit occupation code in t+d+12 (avg +6...+18) 
feor1_2003_t3	pre-2008 4-digit occupation code in t+d+24 (avg +18...+30)
feor1_2008_t3	post-2008 4-digit occupation code in t+d+24 (avg +18...+30)
FO_t		firm foreign in t (or t-1 if treated only EMP=1 in t-1))
mean_tfp_wr_t	firm TFP in t (or t-1 if treated only EMP=1 in t-1)
FO_t1		firm foreign in t+d
mean_tfp_wr_t1	firm TFP in t+d
FO_t2		firm foreign in t+d+12 (avg +6...+18) 
mean_tfp_wr_t2	firm TFP in t+d+12 (avg +6...+18) 
FO_t3		firm foreign in t+d+24 (avg +18...+30)
mean_tfp_wr_t3	firm TFP in t+d+24 (avg +18...+30)
wh1_t0		pre-acc (l1-l3 avg) hours worked 
wh1_t1		hours worked in t+d
wh1_t2		hours worked in t+d+12 (avg +6...+18) 
wh1_t3		hours worked in t+d+24 (avg +18...+30)
lnw1_totw_t0	pre-acc (l1-l3 avg) monthly wage (=w1 deflated)
lnw1_totw_t1	monthly wage in t+d (=w1 deflated)
lnw1_totw_t2	monthly wage in t+d+12 (avg +6...+18) (=w1 deflated)
lnw1_totw_t3	monthly wage in t+d+24 (avg +18...+30) (=w1 deflated)
lnw1_adj_t0	pre-acc (l1-l3 avg) hourly wage adjusted for actual days being insured that month (=(w1/(nap/days_of_month)/((wh1_t*30)/7) deflated)	
lnw1_adj_t1	hourly wage in t+d adjusted for actual days being insured that month (=(w1/(nap/days_of_month)/((wh1_t*30)/7) deflated)
lnw1_adj_t2	hourly wage in t+d+12 (avg +6...+18) adjusted for actual days being insured that month	(=(w1/(nap/days_of_month)/((wh1_t*30)/7) deflated)
lnw1_adj_t3	hourly wage in t+d+24 (avg +18...+30) adjusted for actual days being insured that month	(=(w1/(nap/days_of_month)/((wh1_t*30)/7) deflated)
lnw1_nap_t0	pre-acc (l1-l3 avg) monthly wage adjusted for actual days being insured that month (=(w1/(nap/days_of_month) deflated)
lnw1_nap_t1 	monthly wage in t+d adjusted for actual days being insured that month (=(w1/(nap/days_of_month) deflated)
lnw1_nap_t2	monthly wage in t+d+12 (avg +6...+18) adjusted for actual days being insured that month (=(w1/(nap/days_of_month) deflated)
lnw1_nap_t3 	monthly wage in t+d+24 (avg +18...+30) adjusted for actual days being insured that month (=(w1/(nap/days_of_month) deflated)
lnw1_jnap_t0	pre-acc (l1-l3 avg) monthly wage adjusted for actual days when receiving income that month (=(w1/(jnap/days_of_month) deflated)
lnw1_jnap_t1	monthly wage in t+d adjusted for actual days when receiving income that month (=(w1/(jnap/days_of_month) deflated)
lnw1_jnap_t2	monthly wage in t+d+12 (avg +6...+18) adjusted for actual days when receiving income that month (=(w1/(jnap/days_of_month) deflated)
lnw1_jnap_t3	monthly wage in t+d+24 (avg +18...+30) adjusted for actual days when receiving income that month (=(w1/(jnap/days_of_month) deflated)
FEj_m*		firm FE in t* (i.e. * months after the accident or its last month if it is a clustered shock)
lnw1_restr_m*	monthly wage deflated and corrected for hours worked in t* (i.e. * months after the accident or its last month if it is a clustered shock) (=log((w1_t/defl)/((wh1_t*30)/7)))
feor1_h2_t1	2-digit harmonized occupation code in t+d 
feor1_h2_t2	2-digit harmonized occupation code in t+d+12
feor1_h2_t3	2-digit harmonized occupation code in t+d+24
vallazon_t1	firm id in t+d
vallazon_t2	firm id in t+d+12
vallazon_t3	firm id in t+d+24

comparing the following variables with the old (baseline) version:
anon tmerge_t t maxt_clue flagEMPml1 d ABS HS mint_clu flagEMPm0 ferfi kor occupation treated control diff_year vallazon_t w1_t wh1_t fogvisz1_t feor1_h2_t teaor1_h1 flag_missing* FEj_l*_imp logmean_w_* FO_pre_imp size_pre_imp mean_lag_w log_health* ?_12mo lnw1_restr* FEj_t* FEi_imp ev
*/



